/*    Copyright 2010 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.discovery.objectives;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import mosdi.discovery.ExpectedClumpSizeBoundCalculator;
import mosdi.discovery.ObjectiveFunction;
import mosdi.discovery.PoissonBoundCalculator;
import mosdi.discovery.ScoreAndPValue;
import mosdi.discovery.TextModelIupacBounds;
import mosdi.discovery.MotifFinder.SearchState;
import mosdi.distributions.PoissonDistribution;
import mosdi.fa.Alphabet;
import mosdi.fa.CDFA;
import mosdi.fa.DFAFactory;
import mosdi.fa.DoublePatternExpectationCalculator;
import mosdi.fa.FiniteMemoryTextModel;
import mosdi.fa.ForwardPatternExpectationCalculator;
import mosdi.fa.GeneralizedString;
import mosdi.fa.PatternExpectationCalculator;
import mosdi.index.SuffixTree;
import mosdi.paa.ClumpSizeCalculator;
import mosdi.util.BitArray;
import mosdi.util.Iupac;
import mosdi.util.IupacStringConstraints;

public class OccurrenceCountObjective implements ObjectiveFunction {
	private FiniteMemoryTextModel textModel;
	private int[] occurrenceCountAnnotation;
	// private int[] maxMatchCountAnnotation;
	private SearchState state;
	private PatternExpectationCalculator expectationCalculator;
	/** IupacStringConstraints[i] contains constraints for
	 *  of pattern[i..]. */
	private IupacStringConstraints[] remainingConstraints;
	/** Number of positions where a pattern could be placed 
	  * (e.g. n-p.length+1 for a string of length n). */
	private int effectiveLength;
	private PoissonBoundCalculator poissonBoundCalculator;
	/** Maximal number of occurrences for which a check is performed. */ 
	private int maxCheckOccurrences;
	/** Maximal upper bound for clump sizefor which a check is performed. */ 
	private final int maxCheckClumpSizeBounds = 100;
	private double maxExpectation;
	private double goodEnoughClumpSizeBound;
	/** clumpSizeUpperBounds[i] contains an upper bound for the clump size
	 *  of pattern[0..i]. */
	private double[] clumpSizeUpperBounds;
	private TextModelIupacBounds textModelBounds;
	private TextModelIupacBounds backwardTextModelBounds;
	private ExpectedClumpSizeBoundCalculator clumpSizeBounder;
	
	public OccurrenceCountObjective(FiniteMemoryTextModel textModel, int[] occurrenceCountAnnotation, int[] maxMatchCountAnnotation, int effectiveLength, boolean useDetailedBounds) {
		this.textModel = textModel;
		this.textModelBounds = new TextModelIupacBounds(textModel, useDetailedBounds);
		this.backwardTextModelBounds = null;
		this.clumpSizeBounder = new ExpectedClumpSizeBoundCalculator(textModelBounds);
		this.occurrenceCountAnnotation = occurrenceCountAnnotation;
		// this.maxMatchCountAnnotation = maxMatchCountAnnotation;
		this.effectiveLength = effectiveLength;
		this.maxExpectation = Double.POSITIVE_INFINITY;
		this.goodEnoughClumpSizeBound = 1.01;
	}

	@Override
	public void initialize(SearchState searchState) {
		this.state = searchState;
		// if (maxMatchCountAnnotation!=null) state.requestMultiplicity();
		clumpSizeUpperBounds = new double[state.getPattern().length];
		Arrays.fill(clumpSizeUpperBounds, Double.POSITIVE_INFINITY);
		remainingConstraints = new IupacStringConstraints[state.getPattern().length+1];
		remainingConstraints[0] = state.getIupacConstraints();
		if (remainingConstraints[0]==null) throw new IllegalArgumentException("Need IUPAC constraints!");
		if (state.isReverseConsidered()) {
			if (backwardTextModelBounds==null) {
				backwardTextModelBounds = new TextModelIupacBounds(textModel.reverseTextModel(),false);
			}
			expectationCalculator = new DoublePatternExpectationCalculator(textModel, state.getPattern(),textModelBounds,backwardTextModelBounds);
		} else {
			expectationCalculator = new ForwardPatternExpectationCalculator(textModel, state.getPattern(),textModelBounds);	
		}
		double minExpectation = (expectationCalculator.getLowerBound(0,state.getIupacConstraints())/maxCheckClumpSizeBounds) * effectiveLength;
		double maxExpectation = Math.min(expectationCalculator.getUpperBound(0,state.getIupacConstraints()) * effectiveLength, this.maxExpectation);
		this.maxCheckOccurrences = Math.min((int)(effectiveLength/20.0), new PoissonDistribution(maxExpectation).pvaluePrecisionLimit());
		poissonBoundCalculator = new PoissonBoundCalculator(minExpectation, maxExpectation, maxCheckOccurrences, 1000, false);
	}
	
	@Override
	public void updatePattern(int newCharacter, int leftmostChangedPosition) {
		int[] pattern = state.getPattern();
		remainingConstraints[leftmostChangedPosition+1] = remainingConstraints[leftmostChangedPosition].minus(pattern[leftmostChangedPosition]);
		expectationCalculator.setPatternPosition(leftmostChangedPosition, pattern[leftmostChangedPosition]);
		clumpSizeUpperBounds[leftmostChangedPosition] = (leftmostChangedPosition>0)?clumpSizeUpperBounds[leftmostChangedPosition-1]:Double.POSITIVE_INFINITY;
		// check if we already have an useful upper bound 
		if (clumpSizeUpperBounds[leftmostChangedPosition]>goodEnoughClumpSizeBound) {
			// if not: compute bound
			int prefixLength = leftmostChangedPosition+1;
			double newBound = clumpSizeBounder.upperBound(pattern, prefixLength, remainingConstraints[prefixLength], state.isReverseConsidered());
			if (newBound>maxCheckClumpSizeBounds) newBound = Double.POSITIVE_INFINITY;
			clumpSizeUpperBounds[prefixLength-1] = newBound;
//			if (Log.getLogLevel() == Log.Level.DEBUG) {
//				String s = Alphabet.getIupacAlphabet().buildString(Arrays.copyOf(pattern, leftmostChangedPosition+1));
//				Log.printf(Log.Level.DEBUG, "Prefix: %s, exp. clump size <=%f\n", s, clumpSizeUpperBounds[leftmostChangedPosition]);
//			}
		}
	}
	
	public void setGoodEnoughClumpSizeBound(double goodEnoughClumpSizeBound) {
		this.goodEnoughClumpSizeBound = goodEnoughClumpSizeBound;
	}

	@Override
	public double pValueLowerBound(int prefixLength, int[] nodes) {
		// Step 1) Compute upper bound for the number of matches
		int matches = 0;
		for (int node : nodes) {
			//if (maxMatchCountAnnotation==null) {
			matches+=occurrenceCountAnnotation[node];
			//} else {
			//	matches+=Math.min(occurrenceCountAnnotation[node], maxMatchCountAnnotation[node]*state.getMultiplicityLeft());
			//}
		}
		if (matches>maxCheckOccurrences) return 0.0;
		// Log.printf(Log.Level.DEBUG, ">>check>> %s %f %d\n", Alphabet.getIupacAlphabet().buildString(Arrays.copyOf(pattern,prefixLength)),clumpSizeUpperBounds[prefixLength-1], matches);
		if (Double.isInfinite(clumpSizeUpperBounds[prefixLength-1])) return 0.0;
		// Step 3) Compute bound for p-value
		double minExpectation = expectationCalculator.getLowerBound(prefixLength, remainingConstraints[prefixLength]) * effectiveLength;
		if (minExpectation>maxExpectation) return 1.0;
		double minExpectedClumpNumber = minExpectation/clumpSizeUpperBounds[prefixLength-1];
		double pvalueLowerBound = poissonBoundCalculator.getLowerBound(minExpectedClumpNumber, matches);
		return pvalueLowerBound;
	}

	/** If set, motifs with an expectation above this bound are considered to 
	 *  p-value 1.0. */
	public void setMaxExpectation(double maxExpectation) {
		this.maxExpectation = maxExpectation;
	}

	@Override
	public ScoreAndPValue evaluate(int[] nodes, double pValueThreshold) {
		int[] pattern = state.getPattern();
		// Step 1) Count matches
		int matches = 0;
		for (int node : nodes) matches+=occurrenceCountAnnotation[node];
		
		// Step 2) Compute expectation
		double expectation = expectationCalculator.getPrefixExpectation(pattern.length);
		expectation *= effectiveLength;
		// Log.printf(Log.Level.DEBUG, ">>eval>>  %s %f %d\n", Alphabet.getIupacAlphabet().buildString(pattern),expectation, matches);
		if (expectation>maxExpectation) return new ScoreAndPValue();
		
//		// Step 3) Fast check based on clump size estimate
//		if ((pValueThreshold<1.0) && (matches<=maxCheckOccurrences)) {
//			double clumpSizeUpperBound = clumpSizeUpperBounds[pattern.length-1];
//			if (!Double.isInfinite(clumpSizeUpperBound)) {
//				double minExpectedClumpNumber = expectation/clumpSizeUpperBound;
//				if (poissonBoundCalculator.getLowerBound(minExpectedClumpNumber, matches)>pValueThreshold) return null;
//			}
//		}
		
		// build automaton
		List<GeneralizedString> genStringList = new ArrayList<GeneralizedString>(1);
		String s = Alphabet.getIupacAlphabet().buildString(pattern);
		Alphabet dnaAlphabet = Alphabet.getDnaAlphabet();
		// Log.println(Log.Level.DEBUG, "Evaluating candidate: " + s);
		genStringList.add(Iupac.toGeneralizedString(s));
		if (state.isReverseConsidered()) {
			String sReverse = Iupac.reverseComplementary(s);
			if (s.compareTo(sReverse)>0) return new ScoreAndPValue();
			genStringList.add(Iupac.toGeneralizedString(sReverse));
		}
		CDFA cdfa = DFAFactory.build(dnaAlphabet, genStringList, 50000);
		// calculate clump size distribution
		ClumpSizeCalculator csc = new ClumpSizeCalculator(textModel, cdfa, pattern.length);
		double[] clumpSizeDist = csc.clumpSizeDistribution(8, 1e-30);
		// calculate expected clump size
		double expectedClumpSize = 0.0;
		for (int i=1; i<clumpSizeDist.length; ++i) {
			expectedClumpSize+=clumpSizeDist[i]*i;
		}
		double lambda = expectation/expectedClumpSize;
		PoissonDistribution poissonDist = new PoissonDistribution(lambda);
		double pValue = poissonDist.compoundPoissonPValue(clumpSizeDist, matches);
		if (pValue>0.0) return new ScoreAndPValue(matches, -Math.log(pValue));
		else return new ScoreAndPValue(matches, -poissonDist.logCompoundPoissonPValue(clumpSizeDist, matches));
	}

	@Override
	public ScoreAndPValue staticEvaluate(int[] pattern, boolean considerReverse, SuffixTree suffixTree, double pValueThreshold) {
		BitArray[] generalizedAlphabet = Iupac.asGeneralizedAlphabet();
		int[] nodes = suffixTree.walk(pattern, generalizedAlphabet);
		// Step 1) Count matches
		int matches = 0;
		for (int node : nodes) matches+=occurrenceCountAnnotation[node];
		
		// Step 2) Compute expectation
		PatternExpectationCalculator pec = null;
		if (considerReverse) {
			pec = new DoublePatternExpectationCalculator(textModel, pattern, textModelBounds, backwardTextModelBounds);
		} else {
			pec = new ForwardPatternExpectationCalculator(textModel, pattern, textModelBounds);	
		}
		double expectation = pec.getExpectation();
		int effectiveLength = suffixTree.getTotalSequenceLength() - suffixTree.getSequenceCount()*(pattern.length-1);
		// if suffix tree contains both, forward and reverse strand, correct here:
		if (considerReverse) effectiveLength/=2;
		expectation *= effectiveLength;
		
		// Step 3) Fast check based on clump size estimate
		if (pValueThreshold<1.0) {
			ExpectedClumpSizeBoundCalculator b = new ExpectedClumpSizeBoundCalculator(textModelBounds);
			double clumpSizeUpperBound = b.upperBound(pattern, considerReverse);
			if (!Double.isInfinite(clumpSizeUpperBound)) {
				double minExpectedClumpNumber = expectation/clumpSizeUpperBound;
				PoissonDistribution pd = new PoissonDistribution(minExpectedClumpNumber);
				if (pd.getTailProbability(matches)>pValueThreshold) return null;
			}
		}
		// build automaton
		CDFA cdfa = DFAFactory.build(Alphabet.getDnaAlphabet(), Iupac.toGeneralizedStrings(pattern, considerReverse), 50000);
		// calculate clump size distribution
		ClumpSizeCalculator csc = new ClumpSizeCalculator(textModel, cdfa, pattern.length);
		double[] clumpSizeDist = csc.clumpSizeDistribution(8, 1e-30);
		// calculate expected clump size
		double expectedClumpSize = 0.0;
		for (int i=1; i<clumpSizeDist.length; ++i) {
			expectedClumpSize+=clumpSizeDist[i]*i;
		}
		double lambda = expectation/expectedClumpSize;
		PoissonDistribution poissonDist = new PoissonDistribution(lambda);
		double pValue = poissonDist.compoundPoissonPValue(clumpSizeDist, matches);
		double minusLogPValue = (pValue>0.0)?-Math.log(pValue):-poissonDist.logCompoundPoissonPValue(clumpSizeDist, matches);
		if (minusLogPValue<-Math.log(pValueThreshold)) return null;
		return new ScoreAndPValue(matches, minusLogPValue);
	}

	
	@Override
	public String getName() {
		return "occurrence-count";
	}

}
